In [ ]:
# @title Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
# @title Importing datasets
vaccination = pd.read_excel('vaccination_data.xlsx')
covid = pd.read_csv('covid_19_morbidity_data.csv', sep=';')
In [ ]:
# @title Renaming variables/ datasets
# clean and translate column names
# create a dictionary for the column name translations with underscores and lowercase letters
vaccination_col_mapping = {
    'Vakcinācijas iestādes kods': 'vaccination_facility_code',
    'Vakcinācijas iestādes nosaukums': 'vaccination_facility_name',
    'Vakcinācijas datums': 'vaccination_date',
    'Vakcīnas veids': 'vaccine_type',
    'Preparāts': 'preparation',
    'Vakcinācijas posms': 'vaccination_stage',
    'Vakcīnas kārtas numurs': 'vaccine_series_number',
    'Preparāta daudzums ml': 'preparation_amount_ml',
    'Vakcīnas ievadīšanas veids': 'vaccine_administration_method',
    'Indikācijas vakcinācijai': 'vaccination_indications',
    'Vakcinētās personas vecums': 'vaccinated_person_age',
    'Vakcinētās personas dzimums': 'vaccinated_person_gender',
    'Vakcinēto personu skaits': 'number_of_vaccinated_persons'
}

covid_col_mapping = {
    'Datums': 'date',
    'TestuSkaits': 'number_of_tests',
    'ApstiprinataCOVID19InfekcijaSkaits': 'confirmed_covid19_cases',
    'Ipatsvars': 'proportion',
    'IzarstetoPacientuSkaits': 'number_of_recovered_patients',
    'MirusoPersonuSkaits': 'number_of_deaths',
    'ApstiprinatiVecGr_0-9Gadi': 'confirmed_age_group_0_9_years',
    'ApstiprinatiVecGr_10-19Gadi': 'confirmed_age_group_10_19_years',
    'ApstiprinatiVecGr_20-29Gadi': 'confirmed_age_group_20_29_years',
    'ApstiprinatiVecGr_30-39Gadi': 'confirmed_age_group_30_39_years',
    'ApstiprinatiVecGr_40-49Gadi': 'confirmed_age_group_40_49_years',
    'ApstiprinatiVecGr_50-59Gadi': 'confirmed_age_group_50_59_years',
    'ApstiprinatiVecGr_60-69Gadi': 'confirmed_age_group_60_69_years',
    'ApstiprinatiVecGr_70GadiUnVairak': 'confirmed_age_group_70_and_older',
    'ApstiprinatiVecGr_70-79Gadi': 'confirmed_age_group_70_79_years',
    'ApstiprinatiVecGr_80GadiUnVairak': 'confirmed_age_group_80_and_older',
    'IzveselojusosSkaits': 'number_of_recovered_cases',
    '14DienuKumulativaSaslimstibaUz100000Iedzivotaju': '14_day_cumulative_infection_rate_per_100000_inhabitants',
    'ApstCOVID19InfSk_NevakcVakcNepab': 'confirmed_covid19_cases_unvaccinated',
    'ApstCOVID19InfSk_Vakc': 'confirmed_covid19_cases_vaccinated',
    'ApstCOVID19InfSk_VakcNepab': 'confirmed_covid19_cases_vaccinated_unvaccinated',
    'ApstCOVID19InfSk_Nevakc': 'confirmed_covid19_cases_unvaccinated_total',
    'MirusoPersonuSkaits_NevakcVakcNepab': 'number_of_deaths_unvaccinated',
    'MirusoPersonuSkaits_Vakc': 'number_of_deaths_vaccinated'
}

# renaming columns in the vaccination dataset
vaccination.rename(columns=vaccination_col_mapping, inplace=True)

# renaming columns in the covid dataset
covid.rename(columns=covid_col_mapping, inplace=True)
In [ ]:
# @title Preliminary summary stats of datasets
datasets = {'vaccination': vaccination,
            'covid': covid}

for dataset in datasets:
    # print the dataset name
    print(f"\nDataset: {dataset}")
    # print the size of the dataset
    print(f"Size: {datasets[dataset].shape}")

    # print the table header
    print(f"{'Column Name':<60} {'# Unique Values':<20} {'# Missing Values':<20} {'Data Type':<15}")
    print("-" * 115)

    # loop through the columns and print details
    for col in datasets[dataset].columns:
        n_unique = datasets[dataset][col].nunique()
        missing_values = datasets[dataset][col].isnull().sum()
        data_type = str(datasets[dataset][col].dtype)
        print(f"{col:<60} {n_unique:<20} {missing_values:<20} {data_type:<15}")
Dataset: vaccination
Size: (1014924, 13)
Column Name                                                  # Unique Values      # Missing Values     Data Type      
-------------------------------------------------------------------------------------------------------------------
vaccination_facility_code                                    791                  0                    int64          
vaccination_facility_name                                    790                  0                    object         
vaccination_date                                             280                  0                    datetime64[ns] 
vaccine_type                                                 1                    0                    object         
preparation                                                  7                    0                    object         
vaccination_stage                                            5                    0                    object         
vaccine_series_number                                        10                   0                    int64          
preparation_amount_ml                                        4                    0                    float64        
vaccine_administration_method                                1                    0                    object         
vaccination_indications                                      26                   0                    object         
vaccinated_person_age                                        95                   0                    int64          
vaccinated_person_gender                                     2                    0                    object         
number_of_vaccinated_persons                                 73                   0                    int64          

Dataset: covid
Size: (1542, 24)
Column Name                                                  # Unique Values      # Missing Values     Data Type      
-------------------------------------------------------------------------------------------------------------------
date                                                         1542                 0                    object         
number_of_tests                                              1196                 0                    int64          
confirmed_covid19_cases                                      676                  0                    int64          
proportion                                                   384                  0                    float64        
number_of_recovered_patients                                 42                   0                    int64          
number_of_deaths                                             49                   0                    int64          
confirmed_age_group_0_9_years                                224                  0                    object         
confirmed_age_group_10_19_years                              269                  0                    object         
confirmed_age_group_20_29_years                              263                  0                    object         
confirmed_age_group_30_39_years                              348                  0                    object         
confirmed_age_group_40_49_years                              329                  0                    object         
confirmed_age_group_50_59_years                              323                  0                    object         
confirmed_age_group_60_69_years                              277                  0                    object         
confirmed_age_group_70_and_older                             244                  0                    object         
confirmed_age_group_70_79_years                              191                  0                    object         
confirmed_age_group_80_and_older                             129                  0                    object         
number_of_recovered_cases                                    964                  3                    object         
14_day_cumulative_infection_rate_per_100000_inhabitants      955                  0                    object         
confirmed_covid19_cases_unvaccinated                         83                   0                    object         
confirmed_covid19_cases_vaccinated                           407                  0                    object         
confirmed_covid19_cases_vaccinated_unvaccinated              168                  0                    object         
confirmed_covid19_cases_unvaccinated_total                   336                  0                    object         
number_of_deaths_unvaccinated                                39                   0                    object         
number_of_deaths_vaccinated                                  14                   0                    object         

Data Cleaning¶

In [ ]:
# @title Covid incidence data
# convert date to datetime
covid['date'] = pd.to_datetime(covid['date'])

# replace missing values ... with 0
for col in covid.columns:
    covid[col] = covid[col].replace(['...', '…'], 0)

# replace nan values with 0
covid.fillna(0, inplace=True)

# convert object columns to float
for col in covid.columns:
  if covid[col].dtype == 'object':
    covid[col] = covid[col].astype('float64')

# look at the data until the end of 2022
# after 2022, there is no data by age groups,
# recovered cases are not reported after 2023-06
# Covid-19 incidence significantly drops
covid = covid[covid['date'] <= '2022-12-31']
In [ ]:
# @title Vaccination data

# convert vaccination_date to datetime
vaccination['vaccination_date'] = pd.to_datetime(vaccination['vaccination_date'])

# replace vaccinated_person_gender with binary 0 for men and 1 for women
vaccination['vaccinated_person_gender'] = vaccination['vaccinated_person_gender'].replace({'V': 0, 'S': 1}).astype(int)

# get the year-month for each date
vaccination['month'] = vaccination['vaccination_date'].dt.strftime('%Y-%m')

# get the year-month-week for each date
# determine the start of the week
vaccination['week_start'] = vaccination['vaccination_date'] - pd.to_timedelta(vaccination['vaccination_date'].dt.dayofweek, unit='d')
vaccination['week_end'] = vaccination['week_start'] + pd.Timedelta(days=6)
vaccination['week'] = vaccination['week_start'].dt.strftime('%Y-%m-%d') + ' - ' + vaccination['week_end'].dt.strftime('%Y-%m-%d')

# create age categories
vaccination['age_category'] = pd.cut(
    vaccination['vaccinated_person_age'],
    bins=[0, 19, 29, 39, 49, 59, 69, 100],
    labels=['0-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+'])
<ipython-input-6-5a8af02ddc19>:7: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  vaccination['vaccinated_person_gender'] = vaccination['vaccinated_person_gender'].replace({'V': 0, 'S': 1}).astype(int)
In [ ]:
print('Covid incidence data from', covid['date'].min().strftime('%Y-%m-%d'), 'until', covid['date'].max().strftime('%Y-%m-%d'))
print('Vaccination data from', vaccination['vaccination_date'].min().strftime('%Y-%m-%d'), 'until', vaccination['vaccination_date'].max().strftime('%Y-%m-%d'))
Covid incidence data from 2020-02-29 until 2022-12-31
Vaccination data from 2020-12-04 until 2021-09-19

New Attributes¶

In [ ]:
# @title Covid incidence data
covid = covid.copy()
# calculate cumulative cases
covid['cumulative_confirmed_covid19_cases'] = covid['confirmed_covid19_cases'].cumsum()
# number of recovered cases is already a cumulative count (it seems there is a break in series where # of recovered jump from 1.2K to 13K)
# rename the column name
covid.rename(columns={'number_of_recovered_cases': 'cumulative_recovered_cases'}, inplace=True)
# calculate cumulative number of deaths
covid['cumulative_number_of_deaths'] = covid['number_of_deaths'].cumsum()
# calculate active cases
covid['active_cases'] = covid['cumulative_confirmed_covid19_cases'] - covid['cumulative_recovered_cases'] - covid['cumulative_number_of_deaths']
# calculate cumulative number of tests
covid['cumulative_num_tests'] = covid['number_of_tests'].cumsum()
# calculate the morbidity rate (cumulative cases vs. cumulative tests)
covid['morbidity_rate'] = covid['cumulative_confirmed_covid19_cases'] / covid['cumulative_num_tests'] * 100
# calculate the mortality rate (cumulative deaths vs. cumulative cases)
covid['mortality_rate'] = covid['cumulative_number_of_deaths'] / covid['cumulative_confirmed_covid19_cases'] * 100
# Calculate the daily recovered cases by taking the difference in cumulative recovered cases
covid['recovered_cases'] = covid['cumulative_recovered_cases'].diff()

Setting Chart Style¶

In [ ]:
# @title Setting style for charts
import warnings
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import logging

# Suppress all warnings
warnings.filterwarnings("ignore")

# Suppress matplotlib font-related and other warnings from logging
logging.getLogger('matplotlib').setLevel(logging.ERROR)

# Set "serif" as the global font family with specific styling parameters
mpl.rcParams.update({
    "font.family": "serif",
    "font.serif": ["Liberation Serif"],  # Define fallback fonts if Times New Roman is unavailable
    "font.weight": "normal",
    "axes.titlesize": 16,
    "axes.labelsize": 12,
    "xtick.labelsize": 10,
    "ytick.labelsize": 10,
})

# Set a Viridis color palette
viridis_colors = plt.cm.viridis(np.linspace(0, 1, 10))

Data Exploration¶

In [ ]:
import matplotlib.dates as mdates

# @title Active Cases
fig, ax = plt.subplots(figsize=(10, 6))

# Plot the active cases using Viridis color
ax.plot(covid['date'], covid['active_cases'], label='Active Cases', color=viridis_colors[6])

# Format the x-axis as dates with a custom interval
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))

# Rotate x-axis labels for readability
plt.xticks(rotation=45, ha='right')

# Set labels and title
ax.set_xlabel('Month')
ax.set_ylabel('Number of Cases')
ax.set_title('Active Cases')

# Customize the chart by hiding all spines except the bottom
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(True)

plt.show()
No description has been provided for this image
In [ ]:
# @title Morbidity and Mortality Rates (%) Over Time
import matplotlib.dates as mdates

# create a plot
fig, ax = plt.subplots(figsize=(10, 6))

# define labels to plot
labels = ['Morbidity rate, %', 'Mortality rate, %']
cols = ['morbidity_rate', 'mortality_rate']
color_indices = [0, 5]  # Adjust indices as needed for desired colors in Viridis palette

# loop through each line
for i, label in enumerate(labels):
    # plot line using Viridis colors
    ax.plot(covid['date'], covid[cols[i]], label=labels[i], color=viridis_colors[color_indices[i]])

# format axis date as year-month
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
# show every 3 months
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))

# rotate the tick labels
plt.xticks(rotation=45, ha='right')

# set axis labels and title
ax.set_xlabel('Month')
ax.set_ylabel('Rate, %')
ax.set_title('Morbidity and Mortality Rates')

# Customize the chart by hiding all spines except the bottom
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(True)

plt.show()

# save the figure
fig.savefig('morbidity_mortality_rates.png', dpi=300, bbox_inches='tight')
No description has been provided for this image
In [ ]:
# @title Share of Positive Daily Test Results
import matplotlib.dates as mdates

# Plotting the share of positive daily test results
fig, ax = plt.subplots(figsize=(10, 6))

# Use Viridis color for the plot line
ax.plot(covid['date'], covid['proportion'], color=viridis_colors[2])

# Set labels and title
ax.set_xlabel('Date')
ax.set_ylabel('Share of positive test results (%)')
ax.set_title('Share of Positive Daily Test Results')

# Format the x-axis with date intervals
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
plt.xticks(rotation=45, ha='right')

# Customize to show only the bottom spine
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(True)

plt.show()
No description has been provided for this image
In [ ]:
# Covid incidence by age groups - which age groups had the highest infection rates?
# Track changes in infection rates over time to see if vaccination impacts are noticeable in certain age brackets
In [ ]:
# @title Monthly Covid Incidence by Age
import pandas as pd
import matplotlib.pyplot as plt

# Convert 'date' to datetime if it's not already
covid['date'] = pd.to_datetime(covid['date'])

# Define the age group columns and rename for clarity
age_columns = [
    'confirmed_age_group_0_9_years', 'confirmed_age_group_10_19_years',
    'confirmed_age_group_20_29_years', 'confirmed_age_group_30_39_years',
    'confirmed_age_group_40_49_years', 'confirmed_age_group_50_59_years',
    'confirmed_age_group_60_69_years', 'confirmed_age_group_70_and_older'
]
age_labels = {
    'confirmed_age_group_0_9_years': '0-9',
    'confirmed_age_group_10_19_years': '10-19',
    'confirmed_age_group_20_29_years': '20-29',
    'confirmed_age_group_30_39_years': '30-39',
    'confirmed_age_group_40_49_years': '40-49',
    'confirmed_age_group_50_59_years': '50-59',
    'confirmed_age_group_60_69_years': '60-69',
    'confirmed_age_group_70_and_older': '70+'
}
covid = covid.rename(columns=age_labels)

# Ensure we have monthly data to avoid overcrowding the plot
covid['month'] = covid['date'].dt.to_period('M')
covid_monthly = covid.groupby('month')[list(age_labels.values())].sum()

# Define vaccination start and end dates as periods
vaccination_start = pd.Period('2020-12', freq='M')
vaccination_end = pd.Period('2021-09', freq='M')

# Plotting the stacked bar plot
fig, ax = plt.subplots(figsize=(12, 8))
covid_monthly.plot(kind='bar', stacked=True, ax=ax, colormap="viridis", width=0.8)

# Add labels and formatting
ax.set_title('Monthly COVID-19 Incidence by Age Group')
ax.set_xlabel('Month')
ax.set_ylabel('Number of Confirmed Cases')
ax.legend(title="Age Groups", bbox_to_anchor=(1.05, 1), loc='upper left')

# Calculate x-positions for the vertical lines
start_x = (vaccination_start - covid_monthly.index[0]).n
end_x = (vaccination_end - covid_monthly.index[0]).n

# Add grey vertical lines with adjusted positions
plt.axvline(x=start_x, color='grey', linestyle='--', linewidth=1)
plt.axvline(x=end_x, color='grey', linestyle='--', linewidth=1)

# Add text annotations in the middle of the chart
y_middle = ax.get_ylim()[1] / 2  # Position halfway up the y-axis
ax.text(start_x, y_middle, 'Vaccination Start\n2020-12', color='grey', ha='center', rotation=90)
ax.text(end_x, y_middle, 'Vaccination End\n2021-09', color='grey', ha='center', rotation=90)

# Remove top, right, and left spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)

# Only show ticks on bottom and left axis
ax.tick_params(axis='x', bottom=True, top=False)
ax.tick_params(axis='y', left=True, right=False)

# Show plot with adjustments
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title COVID-19 Infection Rates by Age Group Over Time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Convert 'date' to datetime if it's not already
covid['date'] = pd.to_datetime(covid['date'])

# Define the age group columns and rename for clarity
age_columns = [
    'confirmed_age_group_0_9_years', 'confirmed_age_group_10_19_years',
    'confirmed_age_group_20_29_years', 'confirmed_age_group_30_39_years',
    'confirmed_age_group_40_49_years', 'confirmed_age_group_50_59_years',
    'confirmed_age_group_60_69_years', 'confirmed_age_group_70_and_older'
]
age_labels = {
    'confirmed_age_group_0_9_years': '0-9',
    'confirmed_age_group_10_19_years': '10-19',
    'confirmed_age_group_20_29_years': '20-29',
    'confirmed_age_group_30_39_years': '30-39',
    'confirmed_age_group_40_49_years': '40-49',
    'confirmed_age_group_50_59_years': '50-59',
    'confirmed_age_group_60_69_years': '60-69',
    'confirmed_age_group_70_and_older': '70+'
}
covid = covid.rename(columns=age_labels)

# Ensure we have monthly data to avoid overcrowding the plot
covid['month'] = covid['date'].dt.to_period('M')
covid_monthly = covid.groupby('month')[list(age_labels.values())].sum()

# Define population estimates for 2022 based on provided data
population_estimates = {
    '0-9': 200109,
    '10-19': 192730,
    '20-29': 180063,
    '30-39': 268339,
    '40-49': 250110,
    '50-59': 257933,
    '60-69': 247103,
    '70+': 279370
}

# Calculate infection rates per 100,000 for each age group
infection_rates = covid_monthly.div(population_estimates.values(), axis=1) * 100000

# Define vaccination start and end dates
vaccination_start = pd.Period('2020-12', freq='M')
vaccination_end = pd.Period('2021-09', freq='M')

# Plotting the line plot
fig, ax = plt.subplots(figsize=(12, 8))

# Use the Viridis colormap
colors = plt.cm.viridis(np.linspace(0, 1, len(infection_rates.columns)))

for i, age_group in enumerate(infection_rates.columns):
    ax.plot(infection_rates.index.to_timestamp(), infection_rates[age_group], label=age_group, color=colors[i])

# Add vertical lines for vaccination start and end
start_x = (vaccination_start - covid_monthly.index[0]).n
end_x = (vaccination_end - covid_monthly.index[0]).n
ax.axvline(x=infection_rates.index.to_timestamp()[start_x], color='grey', linestyle='--', linewidth=1)
ax.axvline(x=infection_rates.index.to_timestamp()[end_x], color='grey', linestyle='--', linewidth=1)

# Add text annotations for vaccination start and end
y_middle = ax.get_ylim()[1] / 2  # Position halfway up the y-axis
ax.text(infection_rates.index.to_timestamp()[start_x], y_middle, 'Vaccination Start\n2020-12', color='grey', ha='center', rotation=90)
ax.text(infection_rates.index.to_timestamp()[end_x], y_middle, 'Vaccination End\n2021-09', color='grey', ha='center', rotation=90)

# Add labels and formatting
ax.set_title('COVID-19 Infection Rates by Age Group Over Time')
ax.set_xlabel('Month')
ax.set_ylabel('Infection Rate (per 100,000 population)')
ax.legend(title="Age Groups", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45)

# Remove top, right, and left spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)

# Only show ticks on bottom and left axis
ax.tick_params(axis='x', bottom=True, top=False)
ax.tick_params(axis='y', left=True, right=False)

# Show plot with adjustments
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title COVID-19 Infection Rates (per 1,000 population) by Age Group and Month
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Convert 'date' to datetime if it's not already
covid['date'] = pd.to_datetime(covid['date'])

# Define the age group columns and rename for clarity
age_columns = [
    'confirmed_age_group_0_9_years', 'confirmed_age_group_10_19_years',
    'confirmed_age_group_20_29_years', 'confirmed_age_group_30_39_years',
    'confirmed_age_group_40_49_years', 'confirmed_age_group_50_59_years',
    'confirmed_age_group_60_69_years', 'confirmed_age_group_70_and_older'
]
age_labels = {
    'confirmed_age_group_0_9_years': '0-9',
    'confirmed_age_group_10_19_years': '10-19',
    'confirmed_age_group_20_29_years': '20-29',
    'confirmed_age_group_30_39_years': '30-39',
    'confirmed_age_group_40_49_years': '40-49',
    'confirmed_age_group_50_59_years': '50-59',
    'confirmed_age_group_60_69_years': '60-69',
    'confirmed_age_group_70_and_older': '70+'
}
covid = covid.rename(columns=age_labels)

# Ensure we have monthly data to avoid overcrowding the plot
covid['month'] = covid['date'].dt.to_period('M')
covid_monthly = covid.groupby('month')[list(age_labels.values())].sum()

# Define population estimates for 2022 based on provided data
population_estimates = {
    '0-9': 200109,
    '10-19': 192730,
    '20-29': 180063,
    '30-39': 268339,
    '40-49': 250110,
    '50-59': 257933,
    '60-69': 247103,
    '70+': 279370
}

# Calculate infection rates per 1,000 for each age group
infection_rates = covid_monthly.div(population_estimates.values(), axis=1) * 1000

# Reset index for heatmap creation
infection_rates = infection_rates.reset_index()

# Melt the DataFrame for seaborn
heatmap_data = infection_rates.melt(id_vars='month', var_name='Age Group', value_name='Infection Rate')

# Create a pivot table for the heatmap
heatmap_pivot = heatmap_data.pivot(index='Age Group', columns='month', values='Infection Rate')

# Fill NaN values with 0
heatmap_pivot = heatmap_pivot.fillna(0)

# Normalize the heatmap data for column-wise coloring
normed_heatmap = (heatmap_pivot - heatmap_pivot.min()) / (heatmap_pivot.max() - heatmap_pivot.min())

# Filter to start from October 2020 and ensure consistent shapes
normed_heatmap = normed_heatmap.loc[:, '2020-10':'2024-09']  # Adjust according to your data's last month
heatmap_pivot_filtered = heatmap_pivot.loc[:, '2020-10':'2024-09']  # Filter the same way for annotations

# Create a heatmap with proper formatting
plt.figure(figsize=(14, 10))  # Increase figure size for better visibility

# Create the heatmap using normalized infection rates
sns.heatmap(normed_heatmap, cmap='viridis', cbar_kws={'label': 'Normalized Infection Rate', 'shrink': 0.5},  # Adjust shrink for colorbar
            annot=heatmap_pivot_filtered.astype(int).replace(0, '0'), fmt='',  # Use original values for annotations
            linewidths=0.5, linecolor='white', square=True)

# Format x-axis labels to show just year-month
plt.xticks(ticks=range(len(normed_heatmap.columns)), labels=[str(month)[:7] for month in normed_heatmap.columns], rotation=45)

# Set title and labels
plt.title('COVID-19 Infection Rates (per 1,000 population) by Age Group and Month')
plt.xlabel('Month')
plt.ylabel('Age Group')

# Show the plot
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# Evaluate if the mortality rate and incidence of severe cases decreased after a substantial portion of the population was vaccinated
# Compare the number of deaths between vaccinated and unvaccinated individuals over time
In [ ]:

In [ ]:
# Analyze the number of cases among vaccinated versus unvaccinated people
# how often did breakthrough cases occur, and in what age groups or time periods?
In [ ]:
# @title Breakthrough Cases (Vaccinated) Over Time
import pandas as pd
import matplotlib.pyplot as plt

# Ensure the covid dataframe has a datetime column named 'date'
covid['date'] = pd.to_datetime(covid['date'])

# Filter data to start from 2021-09-01
covid_filtered = covid[covid['date'] >= '2021-09-01']

# Aggregate cases by date for vaccinated and unvaccinated groups
breakthrough_cases = covid_filtered.groupby('date')['confirmed_covid19_cases_vaccinated'].sum()

# Plot the data
plt.figure(figsize=(12, 6))
plt.plot(breakthrough_cases.index, breakthrough_cases, label='Breakthrough Cases (Vaccinated)', color=colors[1])

# Add labels and title
plt.xlabel('Date')
plt.ylabel('Number of Cases')
plt.title('Breakthrough Cases (Vaccinated) Over Time')
plt.legend()

# Customize the chart by hiding all spines except the bottom
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(True)

plt.show()
No description has been provided for this image
In [ ]:
# Vaccination uptake by demographic factors
# Explore gender-based vaccination uptake to identify significant discrapencies betweeen male and female vaccination rates and timing
# Vaccination patterns in high-risk groups (older adults and people with chronic conditions) to understand priopritisation
In [ ]:
# @title Number of Vaccinated Persons by Vaccination Priority Group and Month
import matplotlib.pyplot as plt
import seaborn as sns

# Dictionary for translating Latvian indications to English
translation_dict = {
    "Persona ar hroniskām slimībām": "Person with chronic illnesses",
    "Persona vecumā virs 60 gadiem": "Person aged over 60 years",
    "Ārstniecības persona": "Medical professional",
    "Cita paaugstinātā riska grupa": "Other high-risk group",
    "Izglītības iestāžu darbinieks": "Educational institution worker",
    "Cits iedzīvotājs": "Other resident",
    "Ārstniecības iestādes darbinieks": "Healthcare facility worker",
    "Kontakts ar personām ar hroniskām slimībām": "Contact with chronic patients",
    "Operatīvo dienestu darbinieks": "Emergency services worker",
    "Citas veselības indikācijas": "Other health indications",
    "Pēc paša vēlēšanās": "By personal choice",
    "Nozaru prioritāro iestāžu darbinieks": "Sector priority institution worker",
    "Grūtniece": "Pregnant person",
    "Vakcinēts ārzemēs": "Vaccinated abroad",
    "Plānveida vakcinācija (pēc vakcinācijas kalendāra)": "Scheduled vaccination (per vaccination calendar)",
    "Imūnsupresīva persona": "Immunosuppressed person",
    "SAC darbinieks": "SAC worker",
    "Ieslodzījumu vietu pārvaldes personāls": "Prison staff",
    "SAC klients": "SAC client",
    "Ieslodzītais": "Prisoner",
    "Epidemioloģiskās indikācijas - kontakts ar infekcijas slimnieku": "Epidemiological indications - contact with infectious person",
    "Speciālo iestāžu klients": "Special institution client",
    "Epidemioloģiskās indikācijas - uzliesmojums, epidēmija": "Epidemiological indications - outbreak, epidemic",
    "Pirms ceļojuma": "Before travel",
    "Arodinfekciju profilakse": "Occupational infection prevention",
    "Veselības indikācijas - trauma, zarnu trakta operācija": "Health indications - trauma, gastrointestinal surgery"
}

# number of vaccinated persons by vaccination indications
grouped = vaccination.groupby(['vaccination_indications', 'month'])['number_of_vaccinated_persons'].sum().reset_index().sort_values(by=['month', 'number_of_vaccinated_persons'], ascending=[True, False])

# Replace Latvian indications with English translations in the DataFrame
grouped['vaccination_indications'] = grouped['vaccination_indications'].replace(translation_dict)

# Calculate the total vaccinations per indication and sort in descending order
indication_order = grouped.groupby("vaccination_indications")['number_of_vaccinated_persons'].sum().sort_values(ascending=False).index

# Pivot the data for heatmap format
heatmap_data = grouped.pivot(index="vaccination_indications", columns="month", values="number_of_vaccinated_persons")

# Reorder the rows in the heatmap data to reflect the sorted indication order
heatmap_data = heatmap_data.reindex(indication_order)

# Plotting the heatmap
plt.figure(figsize=(14, 10))
sns.heatmap(heatmap_data, annot=True, fmt=".0f", cmap="YlGnBu", cbar_kws={'label': 'Number of Vaccinated Persons'})
plt.title("Number of Vaccinated Persons by Vaccination Indications and Month")
plt.xlabel("Month")
plt.ylabel("Vaccination Indications")
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image
In [ ]:
# @title Monthly Vaccinations by Priority Group for Top Vaccination Indications by Age Group

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Assuming we have the full initial DataFrame, which we'll call 'vaccination'
# Step 1: Aggregate data to include 'vaccination_indications', 'month', 'number_of_vaccinated_persons', and 'age_category'
grouped = vaccination.groupby(['vaccination_indications', 'month', 'age_category'], observed=True)['number_of_vaccinated_persons'].sum().reset_index()

# Translate vaccination indications to English
grouped['vaccination_indications'] = grouped['vaccination_indications'].replace(translation_dict)

# Step 2: Determine top 5 indications based on total vaccinations
top_indications = grouped.groupby('vaccination_indications')['number_of_vaccinated_persons'].sum().nlargest(5).index

# Step 3: Filter the data for the top 5 indications and make a copy
filtered_data = grouped[grouped['vaccination_indications'].isin(top_indications)].copy()

# Step 4: Sort age groups in ascending order and create subplots
age_groups = sorted(filtered_data['age_category'].unique())  # Sort age groups in ascending order
num_age_groups = len(age_groups)

# Adjusting plot size for readability
plt.figure(figsize=(18, 14))
palette = sns.color_palette("viridis", len(top_indications))

# Plot each age group in a subplot
for i, age_group in enumerate(age_groups):
    plt.subplot((num_age_groups + 1) // 2, 2, i + 1)  # Arrange subplots in a grid
    age_data = filtered_data[filtered_data['age_category'] == age_group]

    # Pivot data to make 'vaccination_indications' as columns for stacked bars
    pivot_data = age_data.pivot(index='month', columns='vaccination_indications', values='number_of_vaccinated_persons').fillna(0)

    # Filter columns to only include existing top indications for this age group
    relevant_indications = [ind for ind in top_indications if ind in pivot_data.columns]
    pivot_data = pivot_data[relevant_indications]

    # Plot stacked bar chart
    pivot_data.plot(kind='bar', stacked=True, ax=plt.gca(), color=palette[:len(relevant_indications)])

    plt.title(f"Monthly Vaccinations by Priority Group for Age Group: {age_group}")
    plt.xlabel("Month")
    plt.ylabel("No. of Vaccinated Persons")  # Abbreviated y-axis label for clarity
    plt.xticks(rotation=45)
    plt.legend(title="Priority Group", loc='upper left', fontsize='small')

plt.tight_layout()
plt.suptitle("Monthly Vaccinations for Top Vaccination Priority Groups by Age Group", y=1.02)
plt.show()
No description has been provided for this image
In [ ]:
# @title Cumulative Vaccination Counts Over Time by Age Group for Each Vaccination Stage

import matplotlib.pyplot as plt
import pandas as pd

# Define adjusted population estimates with combined "0-19" group (for reference, but not used in calculations here)
population_estimates = {
    '0-19': 200109 + 192730,  # Combined population for 0-19 age group
    '20-29': 180063,
    '30-39': 268339,
    '40-49': 250110,
    '50-59': 257933,
    '60-69': 247103,
    '70+': 279370
}

# Convert the 'month' column to datetime to ensure compatibility with date filtering
vaccination['month'] = pd.to_datetime(vaccination['month'], errors='coerce')

# Define translations for stages
stage_translations = {
    '1.pote': 'First Dose',
    '2.pote': 'Second Dose',
    '3.pote': 'Third Dose'
}

# Filter stages to include only translated doses, excluding "1.balstvakcinācija"
stages = [stage for stage in vaccination['vaccination_stage'].unique() if stage in stage_translations]
num_stages = len(stages)

# Define a common x-axis range for all plots
common_start_date = pd.to_datetime("2020-12")
common_end_date = pd.to_datetime("2021-09")

# Set up the plot with subplots for each stage, 1 plot per row
fig, axes = plt.subplots(num_stages, 1, figsize=(14, num_stages * 4), sharex=False)

for i, stage in enumerate(stages):
    # Filter data for the current stage and date range
    stage_data = vaccination[(vaccination['vaccination_stage'] == stage) &
                             (vaccination['month'] >= common_start_date) &
                             (vaccination['month'] <= common_end_date)]

    if stage_data.empty:
        print(f"No data available for stage {stage} in the specified date range.")
        continue

    # Aggregate data to get monthly vaccinations by age group
    stage_summary = stage_data.groupby(['age_category', 'month'], observed=True)['number_of_vaccinated_persons'].sum().reset_index()

    # Calculate cumulative vaccinations over time for this stage
    stage_summary['cumulative_vaccinations'] = stage_summary.groupby('age_category', observed=True)['number_of_vaccinated_persons'].cumsum()

    # Pivot data for stacked area chart
    pivot_data = stage_summary.pivot(index='month', columns='age_category', values='cumulative_vaccinations').fillna(0)

    # Plot stacked area chart in the subplot
    pivot_data.plot(kind='area', stacked=True, colormap='viridis', alpha=0.7, ax=axes[i])

    # Set titles and labels for each subplot using English translations
    axes[i].set_title(f"Cumulative Vaccination Counts for {stage_translations[stage]} Over Time by Age Group")
    axes[i].set_ylabel("Cumulative Vaccinations")
    axes[i].set_xlabel("Month")
    axes[i].set_xlim([common_start_date, common_end_date])
    axes[i].legend(title="Age Group", bbox_to_anchor=(1.05, 1), loc='upper left')
    axes[i].tick_params(axis='x', rotation=0)  # Set rotation to 0 for better readability

# Overall title
plt.suptitle("Cumulative Vaccination Counts Over Time by Age Group for Each Vaccination Stage", y=1.02)
plt.tight_layout(rect=[0, 0.04, 1, 0.96])
plt.show()
No description has been provided for this image
In [ ]:
# @title Monthly Vaccination Counts by Gender for 1st and 2nd Dose (with Cumulative Coverage in %)

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Population estimates for cumulative coverage calculation
population_estimates = {
    0: 869057,  # Male population
    1: 1006700  # Female population
}

# Convert 'vaccination_date' to datetime and extract month in 'YYYY-MM' format
vaccination['month'] = pd.to_datetime(vaccination['vaccination_date'], errors='coerce').dt.to_period('M')

# Filter for 1st and 2nd dose data separately
first_dose_data = vaccination[vaccination['vaccination_stage'] == '1.pote']
second_dose_data = vaccination[vaccination['vaccination_stage'] == '2.pote']

# Function to prepare data for plotting
def prepare_data(dose_data, population_estimates):
    # Group by month and gender, summing vaccination counts
    summary = dose_data.groupby(['month', 'vaccinated_person_gender'])['number_of_vaccinated_persons'].sum().reset_index()

    # Calculate cumulative count and coverage
    summary['cumulative_vaccinated'] = summary.groupby('vaccinated_person_gender')['number_of_vaccinated_persons'].cumsum()
    summary['cumulative_coverage'] = summary.apply(
        lambda row: (row['cumulative_vaccinated'] / population_estimates[row['vaccinated_person_gender']]) * 100,
        axis=1
    )

    # Pivot data for heatmap format and cumulative coverage for annotations
    heatmap_data = summary.pivot(index='vaccinated_person_gender', columns='month', values='number_of_vaccinated_persons').fillna(0)
    cumulative_coverage_data = summary.pivot(index='vaccinated_person_gender', columns='month', values='cumulative_coverage').fillna(0)

    # Create annotations with cumulative coverage in parentheses
    annotations = heatmap_data.astype(int).astype(str) + "\n(" + cumulative_coverage_data.round(2).astype(str) + "%)"

    return heatmap_data, annotations

# Prepare data for 1st dose
first_dose_heatmap_data, first_dose_annotations = prepare_data(first_dose_data, population_estimates)

# Prepare data for 2nd dose
second_dose_heatmap_data, second_dose_annotations = prepare_data(second_dose_data, population_estimates)

# Plotting both heatmaps
fig, axes = plt.subplots(2, 1, figsize=(14, 12))

# 1st Dose Heatmap
sns.heatmap(first_dose_heatmap_data, cmap="viridis", annot=first_dose_annotations, fmt="", ax=axes[0], cbar_kws={'label': 'Number of Vaccinated Persons'})
axes[0].set_title("Monthly 1st Dose Vaccination Counts by Gender (with Cumulative Coverage in %)")
axes[0].set_xlabel("Month")
axes[0].set_ylabel("Gender")
axes[0].set_xticklabels(first_dose_heatmap_data.columns.astype(str), rotation=45)
axes[0].set_yticklabels(["Male", "Female"], rotation=0)

# 2nd Dose Heatmap
sns.heatmap(second_dose_heatmap_data, cmap="viridis", annot=second_dose_annotations, fmt="", ax=axes[1], cbar_kws={'label': 'Number of Vaccinated Persons'})
axes[1].set_title("Monthly 2nd Dose Vaccination Counts by Gender (with Cumulative Coverage in %)")
axes[1].set_xlabel("Month")
axes[1].set_ylabel("Gender")
axes[1].set_xticklabels(second_dose_heatmap_data.columns.astype(str), rotation=45)
axes[1].set_yticklabels(["Male", "Female"], rotation=0)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title Monthly Vaccination Counts by Age Group for 1st Dose (Male and Female, with Cumulative Coverage in %)

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Aggregated population estimates to match the age categories in the vaccination data
population_estimates_age_gender = {
    '0-19': {'male': 48494 + 55030 + 51205 + 47664, 'female': 45296 + 51289 + 48678 + 45183},
    '20-29': {'male': 42739 + 50490, 'female': 40144 + 46690},
    '30-39': {'male': 68879 + 69622, 'female': 64057 + 65781},
    '40-49': {'male': 61158 + 62894, 'female': 60805 + 65253},
    '50-59': {'male': 61275 + 59520, 'female': 67428 + 69710},
    '60-69': {'male': 59254 + 45217, 'female': 75596 + 67036},
    '70+': {'male': 34981 + 21970, 'female': 61032 + 48034}
}

# Convert 'vaccination_date' to datetime and extract month in 'YYYY-MM' format
vaccination['month'] = pd.to_datetime(vaccination['vaccination_date'], errors='coerce').dt.to_period('M')

# Filter for 1st dose only
first_dose_data = vaccination[vaccination['vaccination_stage'] == '1.pote']

# Function to prepare data for plotting by age group and gender
def prepare_data_age_group(dose_data, gender_label):
    # Filter by gender
    data = dose_data[dose_data['vaccinated_person_gender'] == (0 if gender_label == 'male' else 1)]

    # Group by month and age category, summing vaccination counts
    summary = data.groupby(['month', 'age_category'], observed=True)['number_of_vaccinated_persons'].sum().reset_index()

    # Calculate cumulative count and coverage
    summary['cumulative_vaccinated'] = summary.groupby('age_category', observed=True)['number_of_vaccinated_persons'].cumsum()
    summary['cumulative_coverage'] = summary.apply(
        lambda row: (row['cumulative_vaccinated'] / population_estimates_age_gender[row['age_category']][gender_label]) * 100,
        axis=1
    )

    # Pivot data for heatmap format and cumulative coverage for annotations
    heatmap_data = summary.pivot(index='age_category', columns='month', values='number_of_vaccinated_persons').fillna(0)
    cumulative_coverage_data = summary.pivot(index='age_category', columns='month', values='cumulative_coverage').fillna(0)

    # Create annotations with cumulative coverage in parentheses
    annotations = heatmap_data.astype(int).astype(str) + "\n(" + cumulative_coverage_data.round(2).astype(str) + "%)"

    return heatmap_data, annotations

# Prepare data for 1st dose, males only
male_first_dose_heatmap_data, male_first_dose_annotations = prepare_data_age_group(first_dose_data, 'male')

# Prepare data for 1st dose, females only
female_first_dose_heatmap_data, female_first_dose_annotations = prepare_data_age_group(first_dose_data, 'female')

# Plotting both heatmaps in separate rows
fig, axes = plt.subplots(2, 1, figsize=(14, 14))

# Male Heatmap
sns.heatmap(male_first_dose_heatmap_data, cmap="viridis", annot=male_first_dose_annotations, fmt="", ax=axes[0], cbar_kws={'label': 'Number of Vaccinated Persons'})
axes[0].set_title("Monthly 1st Dose Vaccination Counts by Age Group (Males, with Cumulative Coverage in %)")
axes[0].set_xlabel("Month")
axes[0].set_ylabel("Age Group")
axes[0].set_xticklabels(male_first_dose_heatmap_data.columns.astype(str), rotation=45)

# Female Heatmap
sns.heatmap(female_first_dose_heatmap_data, cmap="viridis", annot=female_first_dose_annotations, fmt="", ax=axes[1], cbar_kws={'label': 'Number of Vaccinated Persons'})
axes[1].set_title("Monthly 1st Dose Vaccination Counts by Age Group (Females, with Cumulative Coverage in %)")
axes[1].set_xlabel("Month")
axes[1].set_ylabel("Age Group")
axes[1].set_xticklabels(female_first_dose_heatmap_data.columns.astype(str), rotation=45)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# Explore vaccine dosage and preparation efficiency
# Study the doses and types of vaccine administered over time
# Did certain periods show a preference for one vaccine preparation over another, or shifts in dosage amounts?
In [ ]:
# @title Monthly Number of Vaccinated Persons by Preparation Type

import matplotlib.pyplot as plt
from matplotlib.cm import viridis
import pandas as pd

# Mapping dictionary
preparation_mapping = {
    "BBIBP-CorV (Sinopharm)": "Sinopharm",
    "Comirnaty": "Pfizer-BioNTech",
    "CoronaVac(Sinovac)": "Sinovac",
    "Covshield(ChAdOx1_nCov-19)": "AstraZeneca",
    "Jcovden": "J&J Janssen",
    "Spikevax": "Moderna",
    "Vaxzevria": "AstraZeneca"
}
# Apply the mapping to replace technical names with commonly known names
vaccination['preparation'] = vaccination['preparation'].replace(preparation_mapping)

# Create a 'year_month' column from 'vaccination_date' in 'YYYY-MM' format
vaccination['year_month'] = pd.to_datetime(vaccination['vaccination_date'], errors='coerce').dt.to_period('M')

# Group by year_month and the updated preparation column
monthly_vaccination_counts = vaccination.groupby(['year_month', 'preparation'])['number_of_vaccinated_persons'].sum().unstack(fill_value=0)

# Generate colors from the Viridis palette
num_preparations = len(monthly_vaccination_counts.columns)
colors = [viridis(i / num_preparations) for i in range(num_preparations)]

# Plotting with adjusted figure size for a smaller height
fig, ax = plt.subplots(figsize=(14, 6))
monthly_vaccination_counts.plot(kind='bar', stacked=True, color=colors, width=0.8, ax=ax)

# Apply chart formatting
ax.set_title('Monthly Number of Vaccinated Persons by Brand Vaccine Names')
ax.set_xlabel('Year-Month')
ax.set_ylabel('Number of Vaccinated Persons')

# Display ticks for each month
ax.set_xticks(range(len(monthly_vaccination_counts.index)))
ax.set_xticklabels([str(period) for period in monthly_vaccination_counts.index], rotation=45)

# Adjust legend
plt.legend(title='Known Vaccine Names', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small', title_fontsize='medium')

# Customize the chart by hiding all spines except the bottom
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(True)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title Proportional Stacked Bar Plot of Vaccine Choice Within Each Age Group

import matplotlib.pyplot as plt
import pandas as pd

# Apply the mapping to replace technical names with commonly known names
vaccination['preparation'] = vaccination['preparation'].replace(preparation_mapping)

# Group by age_category and preparation to get the total number of vaccinated persons
age_vaccine_counts = vaccination.groupby(['age_category', 'preparation'], observed=False)['number_of_vaccinated_persons'].sum().unstack(fill_value=0)

# Normalize the counts within each age group to get proportions
age_vaccine_proportions = age_vaccine_counts.div(age_vaccine_counts.sum(axis=1), axis=0)

# Generate colors from the Viridis palette
num_preparations = len(age_vaccine_proportions.columns)
colors = [viridis(i / num_preparations) for i in range(num_preparations)]

# Plotting
fig, ax = plt.subplots(figsize=(12, 8))
age_vaccine_proportions.plot(kind='bar', stacked=True, color=colors, ax=ax)

# Apply chart formatting
ax.set_title('Proportion of Vaccine Preparations by Age Category')
ax.set_xlabel('Age Category')
ax.set_ylabel('Proportion of Vaccinated Persons')

# Format y-axis as percentages with a tick for 100%
ax.yaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))
ax.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])  # Adds 100% (1.0) as a y-tick

# Adjust legend
plt.legend(title='Vaccine Type', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small', title_fontsize='medium')

# Customize the chart by hiding all spines except the bottom
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(True)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title Monthly Vaccination Breakdown by Preparation Type for Each Age Category

import matplotlib.pyplot as plt
import pandas as pd

# Apply the mapping to replace technical names with commonly known names
vaccination['preparation'] = vaccination['preparation'].replace(preparation_mapping)

# Ensure 'vaccination_date' is in datetime format and extract month
vaccination['vaccination_date'] = pd.to_datetime(vaccination['vaccination_date'])
vaccination['month'] = vaccination['vaccination_date'].dt.to_period('M')

# Group by month, age_category, and preparation to get the total number of vaccinated persons, with observed=False to suppress warning
monthly_age_vaccine_counts = vaccination.groupby(['month', 'age_category', 'preparation'], observed=False)['number_of_vaccinated_persons'].sum().unstack(fill_value=0)

# Set up subplots for each age category
age_categories = monthly_age_vaccine_counts.index.get_level_values('age_category').unique()
num_age_categories = len(age_categories)

fig, axes = plt.subplots(num_age_categories, 1, figsize=(14, 4 * num_age_categories), sharex=True)

# Generate colors from the Viridis palette
num_preparations = len(monthly_age_vaccine_counts.columns)
colors = [viridis(i / num_preparations) for i in range(num_preparations)]

# Plotting for each age category
for i, age_category in enumerate(age_categories):
    ax = axes[i]
    data = monthly_age_vaccine_counts.xs(age_category, level='age_category')
    data.plot(kind='bar', stacked=True, color=colors, ax=ax, width=0.8)

    # Formatting each subplot
    ax.set_title(f'Age Category: {age_category}')
    ax.set_ylabel('Number of Vaccinated Persons')
    ax.legend(title='Vaccine Type', fontsize='small', title_fontsize='medium', loc='upper left')

    # Adjust y-axis to automatically scale based on the data
    ax.get_yaxis().set_major_formatter(plt.matplotlib.ticker.ScalarFormatter())
    # Customize the chart by hiding all spines except the bottom
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(True)

# Set common labels and tighten layout
axes[-1].set_xlabel('Month')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# Hypothetical herd immunity thresholds
# Using estimates of herd immunity, model the level of immunity expected based on vaccination coverage and confirmed case recovery
# Was herd immunity approached at any point?
In [ ]:
# @title COVID-19 Vaccination Coverage for First Dose Visualization

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

# Assuming 'vaccination' dataframe is provided

# Step 1: Filter for '1.pote' in the vaccination stage and remove NaNs in age_category
vaccination_first_dose = vaccination[vaccination['vaccination_stage'] == '1.pote'].copy()
vaccination_first_dose = vaccination_first_dose.dropna(subset=['age_category'])
vaccination_first_dose['age_category'] = vaccination_first_dose['age_category'].astype(str)

# Filter out any remaining unexpected NaN values in age_category directly
vaccination_first_dose = vaccination_first_dose[vaccination_first_dose['age_category'] != 'nan']

# Aggregate the number of vaccinated persons by age_category and month
vaccination_agg = vaccination_first_dose.groupby(['month', 'age_category']).agg(
    num_vaccinated=('number_of_vaccinated_persons', 'sum')
).reset_index()

# Define population estimates and map to age categories
population_estimates_age_gender = {
    '0-19': {'male': 48494 + 55030 + 51205 + 47664, 'female': 45296 + 51289 + 48678 + 45183},
    '20-29': {'male': 42739 + 50490, 'female': 40144 + 46690},
    '30-39': {'male': 68879 + 69622, 'female': 64057 + 65781},
    '40-49': {'male': 61158 + 62894, 'female': 60805 + 65253},
    '50-59': {'male': 61275 + 59520, 'female': 67428 + 69710},
    '60-69': {'male': 59254 + 45217, 'female': 75596 + 67036},
    '70+': {'male': 34981 + 21970, 'female': 61032 + 48034}
}
population_totals = {age: sum(gender.values()) for age, gender in population_estimates_age_gender.items()}
vaccination_agg['estimated_population'] = vaccination_agg['age_category'].map(population_totals)

# Convert 'month' to datetime format and calculate cumulative coverage
vaccination_agg['month'] = pd.to_datetime(vaccination_agg['month'].astype(str))
vaccination_agg = vaccination_agg.sort_values(by=['age_category', 'month'])
vaccination_agg['cumulative_vaccinated'] = vaccination_agg.groupby('age_category')['num_vaccinated'].cumsum()
vaccination_agg['coverage_per_cent'] = (vaccination_agg['cumulative_vaccinated'] / vaccination_agg['estimated_population']) * 100

# Prepare the vaccination coverage result table
result_table = vaccination_agg[['month', 'age_category', 'coverage_per_cent']]

# Step 4: Set up plot style
sns.set_palette(sns.color_palette("viridis", n_colors=result_table['age_category'].nunique()))

# Initialize the plot
fig, ax1 = plt.subplots(figsize=(12, 8))

# Plot vaccination coverage for each age category on the primary y-axis
sns.lineplot(data=result_table, x='month', y='coverage_per_cent', hue='age_category', marker='o', ax=ax1)
ax1.set_ylim(0, 100)  # Set y-axis limits for coverage

# Adding herd immunity threshold lines and slightly shifting text to the right
herd_immunity_thresholds = [60, 70, 85]
for threshold in herd_immunity_thresholds:
    ax1.axhline(y=threshold, color='gray', linestyle='--', linewidth=1)
    ax1.text(result_table['month'].min() + pd.Timedelta(days=10), threshold + 1, f'{threshold}%', color='gray')

# Customize the plot
ax1.set_title('COVID-19 Vaccination Coverage for First Dose by Age Category')
ax1.set_xlabel('Month')
ax1.set_ylabel('Vaccination Coverage (%)')
ax1.legend(title='Age Category', loc='upper left', bbox_to_anchor=(1, 1))  # Move legend outside plot area
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

# Remove grid, ensure no gridlines appear on both axes
ax1.grid(False)

# Hide all but bottom spines as specified
for spine in ['top', 'right', 'left']:
    ax1.spines[spine].set_visible(False)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title COVID-19 Vaccination Coverage for Second Dose Visualization

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

# Assuming 'vaccination' dataframe is provided

# Step 1: Filter for '2.pote' in the vaccination stage and remove NaNs in age_category
vaccination_second_dose = vaccination[vaccination['vaccination_stage'] == '2.pote'].copy()
vaccination_second_dose = vaccination_second_dose.dropna(subset=['age_category'])
vaccination_second_dose['age_category'] = vaccination_second_dose['age_category'].astype(str)

# Filter out any remaining unexpected NaN values in age_category directly
vaccination_second_dose = vaccination_second_dose[vaccination_second_dose['age_category'] != 'nan']

# Aggregate the number of vaccinated persons by age_category and month
vaccination_agg = vaccination_second_dose.groupby(['month', 'age_category']).agg(
    num_vaccinated=('number_of_vaccinated_persons', 'sum')
).reset_index()

# Define population estimates and map to age categories
population_estimates_age_gender = {
    '0-19': {'male': 48494 + 55030 + 51205 + 47664, 'female': 45296 + 51289 + 48678 + 45183},
    '20-29': {'male': 42739 + 50490, 'female': 40144 + 46690},
    '30-39': {'male': 68879 + 69622, 'female': 64057 + 65781},
    '40-49': {'male': 61158 + 62894, 'female': 60805 + 65253},
    '50-59': {'male': 61275 + 59520, 'female': 67428 + 69710},
    '60-69': {'male': 59254 + 45217, 'female': 75596 + 67036},
    '70+': {'male': 34981 + 21970, 'female': 61032 + 48034}
}
population_totals = {age: sum(gender.values()) for age, gender in population_estimates_age_gender.items()}
vaccination_agg['estimated_population'] = vaccination_agg['age_category'].map(population_totals)

# Convert 'month' to datetime format and calculate cumulative coverage
vaccination_agg['month'] = pd.to_datetime(vaccination_agg['month'].astype(str))
vaccination_agg = vaccination_agg.sort_values(by=['age_category', 'month'])
vaccination_agg['cumulative_vaccinated'] = vaccination_agg.groupby('age_category')['num_vaccinated'].cumsum()
vaccination_agg['coverage_per_cent'] = (vaccination_agg['cumulative_vaccinated'] / vaccination_agg['estimated_population']) * 100

# Prepare the vaccination coverage result table
result_table = vaccination_agg[['month', 'age_category', 'coverage_per_cent']]

# Step 4: Set up plot style
sns.set_palette(sns.color_palette("viridis", n_colors=result_table['age_category'].nunique()))

# Initialize the plot
fig, ax1 = plt.subplots(figsize=(12, 8))

# Plot vaccination coverage for each age category on the primary y-axis
sns.lineplot(data=result_table, x='month', y='coverage_per_cent', hue='age_category', marker='o', ax=ax1)
ax1.set_ylim(0, 100)  # Set y-axis limits for coverage

# Adding herd immunity threshold lines and slightly shifting text to the right
herd_immunity_thresholds = [60, 70, 85]
for threshold in herd_immunity_thresholds:
    ax1.axhline(y=threshold, color='gray', linestyle='--', linewidth=1)
    ax1.text(result_table['month'].min() + pd.Timedelta(days=10), threshold + 1, f'{threshold}%', color='gray')

# Customize the plot
ax1.set_title('COVID-19 Vaccination Coverage for Second Dose')
ax1.set_xlabel('Month')
ax1.set_ylabel('Vaccination Coverage (%)')
ax1.legend(title='Age Category', loc='upper left', bbox_to_anchor=(1, 1))  # Move legend outside plot area
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

# Remove grid, ensure no gridlines appear on both axes
ax1.grid(False)

# Hide all but bottom spines as specified
for spine in ['top', 'right', 'left']:
    ax1.spines[spine].set_visible(False)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title COVID-19 Vaccination Coverage for Second Dose with Recovery Index
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

# Step 1: Prepare output_table for recovery index
# Assuming 'covid' DataFrame is available

# Ensure 'date' is in datetime format
covid['date'] = pd.to_datetime(covid['date'], errors='coerce')

# Extract year-month for grouping
covid['year_month'] = covid['date'].dt.to_period('M')

# Aggregate to get the last value of cumulative recovered cases each month
# and calculate the cumulative sum for confirmed cases
monthly_summary = covid.groupby('year_month').agg(
    cumulative_recovered_cases=('cumulative_recovered_cases', 'last'),
    total_cases=('confirmed_covid19_cases', 'sum')
).reset_index()

# Convert 'year_month' to datetime format if it is in Period format
if monthly_summary['year_month'].dtype == 'period[M]':
    monthly_summary['month'] = monthly_summary['year_month'].dt.to_timestamp()

# Calculate cumulative sum for total cases
monthly_summary['cumulative_total_cases'] = monthly_summary['total_cases'].cumsum()

# Calculate recovery index as a percentage
monthly_summary['recovery_index'] = (monthly_summary['cumulative_recovered_cases'] / monthly_summary['cumulative_total_cases']) * 100

# Define the vaccination period
vaccination_period_start = pd.to_datetime("2020-12-01")
vaccination_period_end = pd.to_datetime("2021-09-30")

# Filter the table for the vaccination period
output_table = monthly_summary[(monthly_summary['month'] >= vaccination_period_start) &
                               (monthly_summary['month'] <= vaccination_period_end)].reset_index(drop=True)

# Step 2: Prepare vaccination coverage data for the same period
# Assuming 'vaccination' DataFrame contains 'vaccination_stage', 'month', 'age_category', and 'number_of_vaccinated_persons'

# Convert 'month' to datetime format if it is an object or Period format
if vaccination['month'].dtype == 'object':
    vaccination['month'] = pd.to_datetime(vaccination['month'], errors='coerce')
elif vaccination['month'].dtype == 'period[M]':
    vaccination['month'] = vaccination['month'].dt.to_timestamp()

# Filter the vaccination data for the second dose within the vaccination period
vaccination_second_dose = vaccination[(vaccination['vaccination_stage'] == '2.pote') &
                                      (vaccination['month'] >= vaccination_period_start) &
                                      (vaccination['month'] <= vaccination_period_end)].copy()

# Remove NaNs in age_category and convert to string
vaccination_second_dose = vaccination_second_dose.dropna(subset=['age_category'])
vaccination_second_dose['age_category'] = vaccination_second_dose['age_category'].astype(str)

# Aggregate the number of vaccinated persons by age_category and month
vaccination_agg = vaccination_second_dose.groupby(['month', 'age_category']).agg(
    num_vaccinated=('number_of_vaccinated_persons', 'sum')
).reset_index()

# Define population estimates and map to age categories
population_estimates_age_gender = {
    '0-19': {'male': 48494 + 55030 + 51205 + 47664, 'female': 45296 + 51289 + 48678 + 45183},
    '20-29': {'male': 42739 + 50490, 'female': 40144 + 46690},
    '30-39': {'male': 68879 + 69622, 'female': 64057 + 65781},
    '40-49': {'male': 61158 + 62894, 'female': 60805 + 65253},
    '50-59': {'male': 61275 + 59520, 'female': 67428 + 69710},
    '60-69': {'male': 59254 + 45217, 'female': 75596 + 67036},
    '70+': {'male': 34981 + 21970, 'female': 61032 + 48034}
}
population_totals = {age: sum(gender.values()) for age, gender in population_estimates_age_gender.items()}
vaccination_agg['estimated_population'] = vaccination_agg['age_category'].map(population_totals)

# Convert 'month' to datetime format if it is not already, then calculate cumulative coverage
vaccination_agg = vaccination_agg.sort_values(by=['age_category', 'month'])
vaccination_agg['cumulative_vaccinated'] = vaccination_agg.groupby('age_category')['num_vaccinated'].cumsum()
vaccination_agg['coverage_per_cent'] = (vaccination_agg['cumulative_vaccinated'] / vaccination_agg['estimated_population']) * 100

# Prepare the vaccination coverage result table
result_table = vaccination_agg[['month', 'age_category', 'coverage_per_cent']]

# Step 3: Plot vaccination coverage and recovery index

# Initialize the plot
fig, ax1 = plt.subplots(figsize=(12, 8))

# Plot vaccination coverage for each age category on the primary y-axis using viridis color palette
sns.lineplot(data=result_table, x='month', y='coverage_per_cent', hue='age_category', marker='o', palette='viridis', ax=ax1)
ax1.set_ylim(0, 100)  # Set y-axis limits for coverage

# Adding herd immunity threshold lines and slightly shifting text to the right
herd_immunity_thresholds = [60, 70, 85]
for threshold in herd_immunity_thresholds:
    ax1.axhline(y=threshold, color='gray', linestyle='--', linewidth=1)
    ax1.text(result_table['month'].min() + pd.Timedelta(days=10), threshold + 1, f'{threshold}%', color='gray')

# Customize the primary y-axis plot
ax1.set_title('COVID-19 Vaccination Coverage for Second Dose with Recovery Index')
ax1.set_xlabel('Month')
ax1.set_ylabel('Vaccination Coverage (%)')
# Move the legend further outside to avoid overlap with the secondary y-axis ticks
ax1.legend(title='Age Category', loc='upper left', bbox_to_anchor=(1.05, 1))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

# Secondary y-axis for recovery index with a black dashed line
ax2 = ax1.twinx()
sns.lineplot(data=output_table, x='month', y='recovery_index', color='black', linestyle='--', marker='x', ax=ax2, label='Recovery Index (%)')
ax2.set_ylabel('Recovery Index (%)')
ax2.tick_params(axis='y')  # Default color for secondary y-axis ticks

# Remove top, left, and right spines
for spine in ['top', 'right', 'left']:
    ax1.spines[spine].set_visible(False)
    ax2.spines[spine].set_visible(False)

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# Effectiveness of Vaccination Facilities
# Are there any notable differences in COVID-19 case reductions in areas served by different vaccination facilities?
# Did certain facilities administer more vaccinations or reach particular demographics more effectively?
In [ ]:
# @title Vaccinated Persons by Facility and Age Category
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Silence warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Assuming your vaccination data is in a DataFrame named 'vaccination'

# Step 1: Select the top 25 facilities by vaccination counts and clean facility names
top_25_facilities = vaccination['vaccination_facility_name'].value_counts().nlargest(25).index
vaccination['facility_group'] = vaccination['vaccination_facility_name'].apply(lambda x: x.split(',')[0] if x in top_25_facilities else None)

# Filter out rows where 'facility_group' is None (i.e., "Other" facilities)
filtered_vaccination = vaccination.dropna(subset=['facility_group'])

# Step 2: Group by the cleaned facility name and age category to get the sum of vaccinated persons
facility_age_data = filtered_vaccination.groupby(['facility_group', 'age_category'], observed=True)['number_of_vaccinated_persons'].sum().unstack(fill_value=0)

# Step 3: Order facilities by total vaccinated persons in descending order
facility_age_data['Total_Vaccinated'] = facility_age_data.sum(axis=1)
facility_age_data = facility_age_data.sort_values('Total_Vaccinated', ascending=False).drop(columns='Total_Vaccinated')

# Step 4: Normalize each row for row-wise color intensity
facility_age_data_normalized = facility_age_data.div(facility_age_data.max(axis=1), axis=0)

# Step 5: Plot the heatmap using viridis colormap, which is row-normalized
plt.figure(figsize=(14, 10))
sns.heatmap(facility_age_data_normalized, cmap="viridis", annot=facility_age_data, fmt="d",
            cbar_kws={'label': 'Normalized Vaccinated Persons'}, linewidths=.5)
plt.title("Number of Vaccinated Persons by Facility and Age Category")
plt.xlabel("Age Category")
plt.ylabel("Vaccination Facility (Top 25, Ordered)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# @title Vaccinated Persons by Facility and Month
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Silence warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Assuming your vaccination data is in a DataFrame named 'vaccination'

# Convert 'month' column to datetime format if it isn't already, then format it to YYYY-MM
vaccination['month'] = pd.to_datetime(vaccination['month']).dt.strftime('%Y-%m')

# Step 1: Select the top 25 facilities by vaccination counts and clean facility names
top_25_facilities = vaccination['vaccination_facility_name'].value_counts().nlargest(25).index
vaccination['facility_group'] = vaccination['vaccination_facility_name'].apply(lambda x: x.split(',')[0] if x in top_25_facilities else None)

# Filter out rows where 'facility_group' is None (i.e., "Other" facilities)
filtered_vaccination = vaccination.dropna(subset=['facility_group'])

# Step 2: Group by the cleaned facility name and formatted month to get the sum of vaccinated persons
facility_month_data = filtered_vaccination.groupby(['facility_group', 'month'], observed=True)['number_of_vaccinated_persons'].sum().unstack(fill_value=0)

# Step 3: Order facilities by total vaccinated persons in descending order
facility_month_data['Total_Vaccinated'] = facility_month_data.sum(axis=1)
facility_month_data = facility_month_data.sort_values('Total_Vaccinated', ascending=False).drop(columns='Total_Vaccinated')

# Step 4: Normalize each row for row-wise color intensity
facility_month_data_normalized = facility_month_data.div(facility_month_data.max(axis=1), axis=0)

# Step 5: Plot the heatmap using viridis colormap, which is row-normalized
plt.figure(figsize=(14, 10))
sns.heatmap(facility_month_data_normalized, cmap="viridis", annot=facility_month_data, fmt="d",
            cbar_kws={'label': 'Normalized Vaccinated Persons'}, linewidths=.5)
plt.title("Number of Vaccinated Persons by Facility and Month")
plt.xlabel("Month")
plt.ylabel("Vaccination Facility (Top 25, Ordered)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image

Forecasting Covid-19 Incidence Data¶

In [ ]:
# @title Evaluating Deep Learning Models for COVID-19 Daily Incidence Data Forecasting

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Dense, Input, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, explained_variance_score

# Configure model parameters
params = {
    "learning_rate": 0.0005,
    "epochs": 100,  # Fixed 100 epochs for training
    "hidden_units": 16,
    "sequence_length": 5
}

# Load and preprocess data
covid['date'] = pd.to_datetime(covid['date'])
covid.fillna(method='ffill', inplace=True)
covid.fillna(method='bfill', inplace=True)

data = covid[['confirmed_covid19_cases', 'recovered_cases', 'number_of_tests']]
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)
data_scaled = pd.DataFrame(data_scaled, columns=['confirmed', 'recovered', 'tests'])

# Helper function to create sequences
def create_sequences(data, sequence_length=5):
    sequences, labels = [], []
    for i in range(len(data) - sequence_length):
        sequences.append(data.iloc[i:i + sequence_length].values)
        labels.append(data['confirmed'].iloc[i + sequence_length])
    return np.array(sequences), np.array(labels)

# Prepare sequences
X, y = create_sequences(data_scaled, params["sequence_length"])

# Split data into train and test sets (80% train, 20% test)
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Model storage, loss, and metrics tracking
models = {}
loss_history = {}
metrics = {}
predictions = {}

# Adjusted evaluation function with RMSLE guard
def evaluate_model(model, X_test, y_test, model_name):
    predicted = model.predict(X_test)
    if predicted.ndim == 3:
        predicted = predicted[:, -1, :]
    if predicted.shape[1] != 1:
        predicted = predicted.reshape(-1, 1)

    predicted_cases = scaler.inverse_transform(np.concatenate([predicted, np.zeros((predicted.shape[0], 2))], axis=1))[:, 0]
    actual_cases = scaler.inverse_transform(np.concatenate([y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))], axis=1))[:, 0]

    # Calculate standard metrics
    rmse = np.sqrt(mean_squared_error(actual_cases, predicted_cases))
    mae = mean_absolute_error(actual_cases, predicted_cases)
    mape = mean_absolute_percentage_error(actual_cases, predicted_cases)
    ev = explained_variance_score(actual_cases, predicted_cases)

    # Add a small constant to avoid log(0) errors
    actual_cases = np.maximum(actual_cases, 1e-9)
    predicted_cases = np.maximum(predicted_cases, 1e-9)

    # Calculate RMSLE safely
    rmsle = np.sqrt(mean_squared_error(np.log1p(actual_cases), np.log1p(predicted_cases)))

    metrics[model_name] = {'RMSE': rmse, 'MAE': mae, 'MAPE': mape, 'EV': ev, 'RMSLE': rmsle}
    predictions[model_name] = predicted_cases  # Store predictions for later plotting

# Function to build, train, and evaluate model
def build_train_evaluate(model_name, layer):
    print(f"\nTraining {model_name} Model...")
    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        layer,
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=params["learning_rate"]), loss='mse')

    # Train with fixed 100 epochs
    history = model.fit(
        X_train, y_train,
        epochs=params["epochs"], batch_size=32,
        validation_data=(X_test, y_test),
        verbose=0
    )
    models[model_name] = model
    loss_history[model_name] = history.history['loss']
    evaluate_model(model, X_test, y_test, model_name)

# Train, evaluate, and store metrics for RNN, LSTM, GRU, and BiLSTM models
for model_name, layer in zip(
    ['RNN', 'LSTM', 'BiLSTM', 'GRU'],
    [SimpleRNN(params["hidden_units"], activation='relu'),
     LSTM(params["hidden_units"], activation='relu'),
     Bidirectional(LSTM(params["hidden_units"], activation='relu')),
     GRU(params["hidden_units"], activation='relu')]
):
    build_train_evaluate(model_name, layer)

### Plot Only Training Loss over Epochs for All Models in One Chart ###
plt.figure(figsize=(10, 6))
for model_name, train_loss in loss_history.items():
    plt.plot(train_loss, label=f'{model_name} Training Loss')

plt.title("Evaluating Deep Learning Models for COVID-19 Daily Incidence Data Forecasting\nTraining Loss Over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Training Loss")
plt.legend()
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.gca().tick_params(axis='x', which='both', bottom=True, top=False)
plt.gca().tick_params(axis='y', which='both', left=False, right=False)
plt.show()

### Plot Metrics with Data Labels Inside the End of Each Bar ###
metrics_df = pd.DataFrame(metrics).T  # Transpose for easier plotting

metric_names = ['RMSE', 'MAE', 'MAPE', 'EV', 'RMSLE']
metric_titles = [
    'RMSE',
    'MAE',
    'MAPE',
    'Explained Variance',
    'RMSLE'
]

for i, metric in enumerate(metric_names):
    plt.figure(figsize=(8, 5))
    bars = plt.bar(metrics_df.index, metrics_df[metric], color='skyblue')

    # Add data labels inside the end of each bar
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval - yval*0.05, round(yval, 2), ha='center', va='top', color="black")

    plt.title(f"Model Evaluation - {metric_titles[i]}")
    plt.xlabel('Models')
    plt.ylabel(metric)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['left'].set_visible(False)
    plt.gca().tick_params(axis='x', which='both', bottom=True, top=False)
    plt.gca().tick_params(axis='y', which='both', left=False, right=False)
    plt.show()

### Plot Time Series Predictions vs. Actual Values ###
plt.figure(figsize=(14, 8))

# Plot the actual COVID-19 cases
actual_cases = scaler.inverse_transform(np.concatenate([y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 2))], axis=1))[:, 0]
plt.plot(actual_cases, label="Actual COVID-19 Cases", color="black", linewidth=2)

# Plot the predictions for each model
for model_name, predicted_cases in predictions.items():
    plt.plot(predicted_cases, label=f"{model_name} Prediction")

plt.title("COVID-19 Cases: Actual vs. Model Predictions")
plt.xlabel("Time (days)")
plt.ylabel("COVID-19 Cases")
plt.legend()
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().tick_params(axis='x', which='both', bottom=True, top=False)
plt.gca().tick_params(axis='y', which='both', left=True, right=False)
plt.show()
Training RNN Model...
7/7 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step

Training LSTM Model...
7/7 ━━━━━━━━━━━━━━━━━━━━ 0s 33ms/step

Training BiLSTM Model...
WARNING:tensorflow:5 out of the last 15 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x7826d51ed090> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step

Training GRU Model...
WARNING:tensorflow:5 out of the last 15 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x7826d4b20ca0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
7/7 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image